Until this vignette, we only used genes features to try to identify and analyze methylation. In this new one, we propose to use methylation features, which means CGI-related features to extract methylation matrix.
In a first part, we’ll present a per-gene visualisation tool and in the second one, a wide database visualisation trough heatmap pipeline, as in the previous vignettes.
Notably, a table named cgi_pf is used, of dimension 27k * 5, processed by hand from TCGA database and giving coordinates, width and center of each CpG islands (CGI) indexed on epic database.
Firstly, we defined a plot_gene_cgi, showing CGI and probes in a given window (fixed here at [100k,100k]).
targeted_genes <- penda_superup_deregulated
features <- get_features(targeted_genes, study = trscr_lusc, up_str = 7500, dwn_str = 7500)
plot_gene_cgi <- function(selected_gene = "OTX1",
window = c(100000,100000),
meth_platform = meth_lusc$platform,
meth_data = meth_lusc$data,
cgi_platform = cgi_pf,
features_list = features){
TSS <- features_list[selected_gene,"TSS"]
## get closest cgi + cgi in range
cgi_chr <- cgi_platform[which(as.character(cgi_platform[,"chr"]) == features_list[selected_gene,1]),]
tmp <- cgi_chr[which(abs(cgi_chr[,"center"]-TSS) < mean(window)+5000),]
cgi_of_interest<- cbind(tmp,abs(tmp[,"center"]-TSS))
colnames(cgi_of_interest)<-c(colnames(tmp),"dist to TSS")
closest_cgi <-cgi_chr[which(abs(cgi_chr[,"center"]-TSS)==min(abs(cgi_chr[,"center"]-TSS),na.rm=T)),]
## get probes
tmp_probes <- dmprocr::get_probe_names(features_list[selected_gene,],
meth_platform,
"Chromosome",
"Start",
up_str=window[1],
dwn_str=window[2])
sub_epic <- meth_platform[tmp_probes, c(2,3,9)]
## cols of probes dots
cols<-sapply(rownames(sub_epic), function(probe){
if(sub_epic[probe,"Feature_Type"] == "Island")
{col = "cornflowerblue"}
if(sub_epic[probe,"Feature_Type"] == "S_Shore")
{col = "chartreuse4"}
if(sub_epic[probe,"Feature_Type"] == "S_Shelf")
{col="gold"}
if(sub_epic[probe,"Feature_Type"] == "N_Shore")
{col = "chocolate"}
if(sub_epic[probe,"Feature_Type"] == "N_Shelf")
{col = "darkslategrey"}
if(sub_epic[probe,"Feature_Type"] == ".")
{col= "black"}
return(col)
})
layout(matrix(1:2,1), respect=TRUE)
plot(sub_epic$Start, rep(1,length(sub_epic$Start)), pch=19, xlim=c(closest_cgi[["center"]]-window[1],closest_cgi[["center"]]+window[2]), cex=1.1, yaxt="n",
main = paste0("nprobes = ",nrow(sub_epic),", ncgi = ",nrow(cgi_of_interest)),
xlab="Coordinates (in bp)",
ylab="",
ylim=c(0,2),
col = cols)
legend("topleft",
c("Island","S_Shore","S_Shelf","N_Shore","N_Shelf","Opensea"),
xpd = TRUE,
inset=c(-0.34,0),
pch=20,
col=c("cornflowerblue","chartreuse4","gold","chocolate","darkslategrey","black"))
text(TSS,0.4,labels = paste0("TSS : ", TSS),cex = 0.7)
points(TSS, 0.5, pch=9, col="red")
abline(h=0.5,lty=3)
if(nrow(cgi_of_interest)>0){
apply(t(t(rownames(cgi_of_interest))),1,function(cgi){
rect(cgi_of_interest[cgi,"start"],1.4,cgi_of_interest[cgi,"end"],1.2,
col = "red",
border="black")
})
}
## Indexing probes per cgi
tmp_cgi_index <- apply(t(t(rownames(sub_epic))),1, function(probe){
foo <-apply(t(t(rownames(cgi_of_interest))),1,function(cgi){
if(sub_epic[probe,"Start"] < cgi_of_interest[cgi,"end"] & sub_epic[probe,"Start"] >= cgi_of_interest[cgi,"start"]){
rownames(cgi_of_interest[cgi,])}
})
index<-unlist(foo)
if(!is.null(index)){
names(index)<- probe
}
return(index)
})
cgi_index <- sort(unlist(tmp_cgi_index))
methvals_of_interest <- meth_data[intersect(rownames(meth_data),names(cgi_index)),]
methvals_of_interest<- methvals_of_interest[names(cgi_index),]
colors=c("cyan", "black", "red")
cols = colorRampPalette(colors)(100)
breaks <- c(seq(0,0.33,length=35),
seq(0.34,0.66,length=33),
seq(0.67,1,length=33))
image(methvals_of_interest,
axes=FALSE,
col=cols,
main=paste0("nprobes indexed : ",nrow(methvals_of_interest)),
breaks = breaks,
ylab="Patients",
xlab="probes")
mtext(paste(noquote(selected_gene),"Methylation profile",sep= " "), outer=TRUE, cex=2, line=-2)
}
plot_gene_cgi("FKBP4")
plot_gene_cgi("OTX1")
plot_gene_cgi("CDT1")
Note that probes are sorted by coordinates. Thus, the heatmap is readable from left to right. As we can see, there is no “neutral” CGI. We “loose” many probes between left and right plots due to index : we include in the heatmap only probes on CGIs. The three examples shows that there is different genes methylation profile : gene with CGI concentrated around TSS, gene with CGI distributed relatively evenly over the window (CDT1) and genes with very spread CGI (FKBP4).
To get our pipeline working, we need as usual a 2-level depth list. The following index probes per CGI features such as shore, shelf, opensea etc.
get_indexed_cgi <- function(window = c(100000,100000),
features_list = features,
cgi_platform = cgi_pf,
meth_data = meth_lusc$data,
meth_platform = meth_lusc$platform){
cgi_indexed_probes<- epimedtools::monitored_apply(t(t(rownames(features_list))),mod = 20,1,function(gene){
print(noquote(paste0("Indexing probes for gene ",gene," ...")))
chr<-features_list[gene,1]
TSS <- features_list[gene,"TSS"]
meth_platform_chr<- meth_platform[rownames(meth_platform)[meth_platform[[1]] %in% chr],]
tmp_probes <- dmprocr::get_probe_names(features_list[gene,], meth_platform_chr, "Chromosome", "Start", window[1], window[2])
## get probe for gene
sub_platform <- meth_platform_chr[tmp_probes, c("Chromosome","Start","Feature_Type")]
for (i in which(sub_platform[,3]==".")){
if(sub_platform[i,"Start"] > TSS){sub_platform[i,"Feature_Type"] <- "upper_opensea"}
if(sub_platform[i,"Start"] < TSS){sub_platform[i,"Feature_Type"] <- "lower_opensea"}
### encode "." as opensea
}
features_type_names <- unique(sub_platform[,"Feature_Type"])
foo<-apply(t(t(features_type_names)),1,function(type){
tmp_type<-assign(paste0(type),rownames(sub_platform[which(sub_platform[,"Feature_Type"]==type),]))
if(!is.list(tmp_type)){assign(paste0(type),list(tmp_type))}
else{assign(paste0(type),tmp_type)}
## create a list of probes for each level of "Feature_Type" + very very very smart trick to deal with 1 length list becoming
## character
})
for (i in 1:length(foo)){
if(is.list(foo[[i]])){foo[i]<-foo[[i]]}
}
names(foo)<- features_type_names
ret = list(sub_epic = sub_platform,
lower_opensea = foo$lower_opensea,
S_shore = foo$S_Shore,
S_shelf = foo$S_Shelf,
Island = foo$Island,
N_shelf = foo$N_Shelf,
N_shore = foo$N_Shore,
upper_opensea = foo$upper_opensea)
return(ret)
})
names(cgi_indexed_probes)<- rownames(features_list)
return(cgi_indexed_probes)
}
cgi_indexed_probes <- get_indexed_cgi()
map_cgi<-reduce_map(cgi_indexed_probes,c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
means_per_regions_per_genes_per_patient_t <- reduce_rows(meth_tumoral,map_cgi, mean ,na.rm=T)
means_t <- subset_vals_per_bins(data = meth_tumoral,
values_per_patient = means_per_regions_per_genes_per_patient_t,
fun = mean,
binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(means_t, main = "mean of means superup/tumoral")
sd_per_regions_per_genes_per_patient_t <- reduce_rows(meth_tumoral,map_cgi, sd ,na.rm=T)
sds_t <- subset_vals_per_bins(data = meth_tumoral,
values_per_patient = sd_per_regions_per_genes_per_patient_t,
fun = sd,
binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(sds_t, main = "mean of sd superup/tumoral")
rsd_per_regions_per_genes_per_patient_t <- reduce_rows(meth_tumoral,map_cgi, rsd ,na.rm=T)
rsds_t <- subset_vals_per_bins(data = meth_tumoral,
values_per_patient = rsd_per_regions_per_genes_per_patient_t,
fun = rsd,
binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(rsds_t, main = "mean of rsd superup/tumoral")
means_per_regions_per_genes_per_patient_h<- reduce_rows(meth_normal,map_cgi, mean ,na.rm=T)
means_h <- subset_vals_per_bins(data = meth_normal,
values_per_patient = means_per_regions_per_genes_per_patient_h,
fun = mean,
binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(means_h, main = "mean of means superup/healthy")
sd_per_regions_per_genes_per_patient_h <- reduce_rows(meth_normal,map_cgi, sd ,na.rm=T)
sds_h <- subset_vals_per_bins(data = meth_normal,
values_per_patient = sd_per_regions_per_genes_per_patient_h,
fun = sd,
binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(sds_h, main = "mean of sd superup/healthy")
rsd_per_regions_per_genes_per_patient_h <- reduce_rows(meth_normal,map_cgi, rsd ,na.rm=T)
rsds_h <- subset_vals_per_bins(data = meth_normal,
values_per_patient = rsd_per_regions_per_genes_per_patient_h,
fun = rsd,
binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(rsds_h, main = "mean of rsd superup/healthy")
boxplot_res(means_h,means_t)
boxplot_res(sds_h,sds_t)
boxplot_res(rsds_h,rsds_t)
means_per_regions_per_genes_per_patient_d<- reduce_rows(meth_diff,map_cgi, mean ,na.rm=T)
means_d <- subset_vals_per_bins(data = meth_diff,
values_per_patient = means_per_regions_per_genes_per_patient_d,
fun = mean,
binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(means_d, main = "mean of means superup/differential")
sd_per_regions_per_genes_per_patient_d <- reduce_rows(meth_diff,map_cgi, sd ,na.rm=T)
sds_d <- subset_vals_per_bins(data = meth_diff,
values_per_patient = sd_per_regions_per_genes_per_patient_d,
fun = sd,
binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(sds_d, main = "mean of sd superup/differential")
rsd_per_regions_per_genes_per_patient_d <- reduce_rows(meth_diff,map_cgi, rsd ,na.rm=T)
rsds_d <- subset_vals_per_bins(data = meth_diff,
values_per_patient = rsd_per_regions_per_genes_per_patient_d,
fun = rsd,
binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(rsds_d, main = "mean of rsd superup/differential")
rsds_d